#!/usr/bin/env expect
############################################################################
# Purpose: Test of Slurm functionality
#          validate smd -f (--faulty-nodes) commands.
############################################################################
# Copyright (C) 2011-2014 SchedMD LLC
# Written by Nathan Yee <nyee32@schedmd.com>
# All rights reserved
############################################################################
source ./globals

set job_id     0
set reason_str "test"
set node_name  ""
set file_in    "$test_dir/script"

if {![param_contains [get_config_param "SlurmctldPlugstack"] "nonstop"]} {
	skip "This test is only compatible when SlurmctldPlugstack includes nonstop"
}
if {![is_super_user]} {
	skip "This test is only suitable for a super user (to restore down nodes)"
}

proc cleanup {} {
	global job_id node_name

	cancel_job $job_id
	if {$node_name ne ""} {
		reset_state $node_name
	}
}

proc get_node {job_id} {
	global scontrol squeue re_word_str re_word_str

	set node_list ""
	spawn $squeue -j $job_id --noheader -o NodeList=%N
	expect {
		-re "NodeList=($re_word_str)" {
			set node_list $expect_out(1,string)
			exp_continue
		}
		timeout {
			fail "squeue is not responding"
		}
		eof {
			wait
		}
	}

	set node_name ""
	spawn $scontrol show hostnames $node_list
	expect {
		-re "($re_word_str)" {
			set node_name $expect_out(1,string)
			exp_continue
		}
		timeout {
			fail "scontrol is not responding"
		}
		eof {
			wait
		}
	}
	return $node_name
}

proc reset_state {node} {
	global scontrol

	spawn $scontrol update NodeName=$node State=RESUME
	expect {
		timeout {
			fail "scontrol is not responding"
		}
		eof {
			wait
		}
	}
}

make_bash_script $file_in "$bin_sleep 100"

#
# Submit batch script to test smd -f command
#
set job_id 0
spawn $sbatch -N2 --no-kill --output=/dev/null $file_in
expect {
	-re "Submitted batch job ($number)" {
		set job_id $expect_out(1,string)
		exp_continue
	}
	timeout {
		fail "sbatch is not responding"
	}
	eof {
		wait
	}
}
if {$job_id == 0} {
	fail "sbatch did not submit job"
}

#
# Wait for job to start
#
wait_for_job -fail $job_id "RUNNING"

set node_name [get_node $job_id]

#
# Test the smd -f command
#
set sub_match 0
spawn $smd -f $job_id
expect {
	-re "Job $job_id has no failed or failing hosts" {
		set sub_match 1
		exp_continue
	}
	timeout {
		fail "smd is not responding"
	}
	eof {
		wait
	}
}

if {$sub_match != 1} {
	fail "Job ($job_id) should not be failing"
}

#
# drain node
#
set sub_match 0
spawn $smd -d $node_name  $job_id -R $reason_str
expect {
	-re "Job $job_id node $node_name is being drained" {
		set sub_match 1
		exp_continue
	}
	timeout {
		fail "smd is not reponding"
	}
	eof {
		wait
	}
}

if {$sub_match != 1} {
	fail "smd did not drain node"
}

#
# Test the smd -f command when node is drained
#
set sub_match 0
spawn $smd -f $job_id
expect {
	-re "Job $job_id has 1 failed or failing hosts:" {
		set sub_match 1
		exp_continue
	}
	timeout {
		fail "smd is not responding"
	}
	eof {
		wait
	}
}

if {$sub_match != 1} {
	fail "Job ($job_id) should have failed or failing hosts"
}
